1 2020-02 - NFL Attendance

1.1 DataSet

1.1.1 Download the data: Option 1

attendance <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-04/attendance.csv")
## Parsed with column specification:
## cols(
##   team = col_character(),
##   team_name = col_character(),
##   year = col_double(),
##   total = col_double(),
##   home = col_double(),
##   away = col_double(),
##   week = col_double(),
##   weekly_attendance = col_double()
## )
standings <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-04/standings.csv")
## Parsed with column specification:
## cols(
##   team = col_character(),
##   team_name = col_character(),
##   year = col_double(),
##   wins = col_double(),
##   loss = col_double(),
##   points_for = col_double(),
##   points_against = col_double(),
##   points_differential = col_double(),
##   margin_of_victory = col_double(),
##   strength_of_schedule = col_double(),
##   simple_rating = col_double(),
##   offensive_ranking = col_double(),
##   defensive_ranking = col_double(),
##   playoffs = col_character(),
##   sb_winner = col_character()
## )
games <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-04/games.csv")
## Parsed with column specification:
## cols(
##   year = col_double(),
##   week = col_character(),
##   home_team = col_character(),
##   away_team = col_character(),
##   winner = col_character(),
##   tie = col_character(),
##   day = col_character(),
##   date = col_character(),
##   time = col_time(format = ""),
##   pts_win = col_double(),
##   pts_loss = col_double(),
##   yds_win = col_double(),
##   turnovers_win = col_double(),
##   yds_loss = col_double(),
##   turnovers_loss = col_double(),
##   home_team_name = col_character(),
##   home_team_city = col_character(),
##   away_team_name = col_character(),
##   away_team_city = col_character()
## )

1.1.2 Get the data through the package: Option 2

# Install pkg by devtools
devtools::install_github("thebioengineer/tidytuesdayR")

## Load the data
tuesdata <- tidytuesdayR::tt_load("2020-02-04")
## OR
tuesdata <- tidytuesdayR::tt_load(2020, week = 6)

## Get the individual tables
attendance <- tuesdata$attendance

1.1.3 Getting data locally: Option 3

Check out path and change if necessary

getwd()
## [1] "/home/danicassol/TidyTuesday/R_code"
# setwd('./TidyTuesday/')
dir()
## [1] "g1_plot.gif"       "TidyTuesday_cache"
## [3] "TidyTuesday.Rmd"

Load Data:

attendance <- read.csv(file = "../data/2020/2020-02-04/attendance.csv")
standings <- read.csv(file = "../data/2020/2020-02-04/standings.csv")
games <- read.csv(file = "../data/2020/2020-02-04/games.csv")

1.2 Looking the data

attendance
## # A tibble: 10,846 x 8
##    team  team_name  year  total   home   away  week
##    <chr> <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>
##  1 Ariz… Cardinals  2000 893926 387475 506451     1
##  2 Ariz… Cardinals  2000 893926 387475 506451     2
##  3 Ariz… Cardinals  2000 893926 387475 506451     3
##  4 Ariz… Cardinals  2000 893926 387475 506451     4
##  5 Ariz… Cardinals  2000 893926 387475 506451     5
##  6 Ariz… Cardinals  2000 893926 387475 506451     6
##  7 Ariz… Cardinals  2000 893926 387475 506451     7
##  8 Ariz… Cardinals  2000 893926 387475 506451     8
##  9 Ariz… Cardinals  2000 893926 387475 506451     9
## 10 Ariz… Cardinals  2000 893926 387475 506451    10
## # … with 10,836 more rows, and 1 more variable:
## #   weekly_attendance <dbl>
dim(attendance)
## [1] 10846     8
colnames(attendance)
## [1] "team"              "team_name"        
## [3] "year"              "total"            
## [5] "home"              "away"             
## [7] "week"              "weekly_attendance"
unique(attendance$team)
##  [1] "Arizona"       "Atlanta"       "Baltimore"    
##  [4] "Buffalo"       "Carolina"      "Chicago"      
##  [7] "Cincinnati"    "Cleveland"     "Dallas"       
## [10] "Denver"        "Detroit"       "Green Bay"    
## [13] "Indianapolis"  "Jacksonville"  "Kansas City"  
## [16] "Miami"         "Minnesota"     "New England"  
## [19] "New Orleans"   "New York"      "Oakland"      
## [22] "Philadelphia"  "Pittsburgh"    "San Diego"    
## [25] "San Francisco" "Seattle"       "St. Louis"    
## [28] "Tampa Bay"     "Tennessee"     "Washington"   
## [31] "Houston"       "Los Angeles"
summary(attendance$team)
##    Length     Class      Mode 
##     10846 character character
unique(attendance$year)
##  [1] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
## [12] 2011 2012 2013 2014 2015 2016 2017 2018 2019
summary(attendance$year)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2000    2005    2010    2010    2015    2019
any(is.na(attendance$weekly_attendance))
## [1] TRUE
str(attendance)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10846 obs. of  8 variables:
##  $ team             : chr  "Arizona" "Arizona" "Arizona" "Arizona" ...
##  $ team_name        : chr  "Cardinals" "Cardinals" "Cardinals" "Cardinals" ...
##  $ year             : num  2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
##  $ total            : num  893926 893926 893926 893926 893926 ...
##  $ home             : num  387475 387475 387475 387475 387475 ...
##  $ away             : num  506451 506451 506451 506451 506451 ...
##  $ week             : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ weekly_attendance: num  77434 66009 NA 71801 66985 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   team = col_character(),
##   ..   team_name = col_character(),
##   ..   year = col_double(),
##   ..   total = col_double(),
##   ..   home = col_double(),
##   ..   away = col_double(),
##   ..   week = col_double(),
##   ..   weekly_attendance = col_double()
##   .. )

1.2.1 Combine tables

combine <- attendance %>% left_join(standings, by = c("team", 
    "team_name", "year"))

1.2.2 Add a column

combine <- combine %>% mutate(full_name = paste(team, team_name, 
    sep = "_"))

1.2.3 Removing a column and row

col_remove <- names(combine) %in% c("full_name")
combine[, !col_remove]
## # A tibble: 10,846 x 20
##    team  team_name  year  total   home   away  week
##    <chr> <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>
##  1 Ariz… Cardinals  2000 893926 387475 506451     1
##  2 Ariz… Cardinals  2000 893926 387475 506451     2
##  3 Ariz… Cardinals  2000 893926 387475 506451     3
##  4 Ariz… Cardinals  2000 893926 387475 506451     4
##  5 Ariz… Cardinals  2000 893926 387475 506451     5
##  6 Ariz… Cardinals  2000 893926 387475 506451     6
##  7 Ariz… Cardinals  2000 893926 387475 506451     7
##  8 Ariz… Cardinals  2000 893926 387475 506451     8
##  9 Ariz… Cardinals  2000 893926 387475 506451     9
## 10 Ariz… Cardinals  2000 893926 387475 506451    10
## # … with 10,836 more rows, and 13 more variables:
## #   weekly_attendance <dbl>, wins <dbl>, loss <dbl>,
## #   points_for <dbl>, points_against <dbl>,
## #   points_differential <dbl>, margin_of_victory <dbl>,
## #   strength_of_schedule <dbl>, simple_rating <dbl>,
## #   offensive_ranking <dbl>, defensive_ranking <dbl>,
## #   playoffs <chr>, sb_winner <chr>
row_remove <- combine$team %in% "Arizona"
combine[!row_remove, ]
## # A tibble: 10,506 x 21
##    team  team_name  year  total   home   away  week
##    <chr> <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl>
##  1 Atla… Falcons    2000 964579 422814 541765     1
##  2 Atla… Falcons    2000 964579 422814 541765     2
##  3 Atla… Falcons    2000 964579 422814 541765     3
##  4 Atla… Falcons    2000 964579 422814 541765     4
##  5 Atla… Falcons    2000 964579 422814 541765     5
##  6 Atla… Falcons    2000 964579 422814 541765     6
##  7 Atla… Falcons    2000 964579 422814 541765     7
##  8 Atla… Falcons    2000 964579 422814 541765     8
##  9 Atla… Falcons    2000 964579 422814 541765     9
## 10 Atla… Falcons    2000 964579 422814 541765    10
## # … with 10,496 more rows, and 14 more variables:
## #   weekly_attendance <dbl>, wins <dbl>, loss <dbl>,
## #   points_for <dbl>, points_against <dbl>,
## #   points_differential <dbl>, margin_of_victory <dbl>,
## #   strength_of_schedule <dbl>, simple_rating <dbl>,
## #   offensive_ranking <dbl>, defensive_ranking <dbl>,
## #   playoffs <chr>, sb_winner <chr>, full_name <chr>

1.3 Filter the data

combine <- combine %>% group_by(full_name, year) %>% mutate(avg_home_att = round(mean(home/8), 
    0)) %>% ungroup()

att_home <- combine %>% select(full_name, year, avg_home_att, 
    playoffs) %>% distinct()

att_home_summary <- att_home %>% group_by(full_name, playoffs) %>% 
    mutate(avg_home_att_poff = round(mean(avg_home_att), 0)) %>% 
    ungroup() %>% select(full_name, playoffs, avg_home_att_poff) %>% 
    distinct()

1.4 2019 NFL pts_win and pts_loss Comparison (Home team

library(RColorBrewer)
n <- length(unique(games$home_team))
qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 
    "qual", ]
nflcol <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, 
    rownames(qual_col_pals)))
# pie(rep(1,n), col=sample(nflcol, n))

plotdat <- games %>% filter(year == 2019) %>% group_by(home_team_name) %>% 
    summarise(pts_win = mean(pts_win, na.rm = TRUE), pts_loss = mean(pts_loss, 
        na.rm = TRUE)) %>% arrange(pts_win) %>% mutate(home_team_name = factor(home_team_name, 
    levels = .$home_team_name))

ggplot(plotdat, aes(x = pts_win, y = pts_loss, col = home_team_name, 
    label = home_team_name)) + geom_text(size = 3) + scale_color_manual(values = nflcol) + 
    guides(col = FALSE) + theme_light() + labs(x = "Mean pts_win", 
    y = "Mean pts_loss", title = "2019 NFL pts_win and pts_loss Comparison (Home team)")

1.5 Animation Plot

Code based on link

top <- attendance %>% filter(!is.na(weekly_attendance)) %>% group_by(team_name) %>% 
    summarise(n = sum(weekly_attendance)) %>% top_n(4)
## Selecting by n
df <- attendance %>% # filter(!is.na(weekly_attendance)) %>%
filter(team_name %in% top$team_name)

g <- ggplot(df, aes(x = year, y = as.factor(week))) + scale_x_continuous(position = "top") + 
    scale_fill_paletteer_c("grDevices::Greens", direction = -1) + 
    geom_tile(data = subset(df, !is.na(weekly_attendance)), aes(fill = weekly_attendance), 
        color = "grey12") + geom_tile(data = subset(df, is.na(weekly_attendance)), 
    fill = "grey20", color = "grey12") + facet_wrap(~team_name, 
    nrow = 2, strip.position = "bottom", scales = "free") + labs(title = "Weekly Attendance", 
    subtitle = "Top 4", x = "Year", y = "Week", fill = "Rate", 
    caption = "Data: 'NFL Attendance'") + theme(panel.grid = element_blank(), 
    axis.ticks.y = element_line(color = "grey76"), legend.position = "none", 
    legend.background = element_rect(fill = "grey10"), legend.key.size = unit(1.5, 
        "cm"), panel.background = element_rect(fill = "grey10", 
        color = "grey10"), plot.background = element_rect(fill = "grey10"), 
    strip.background = element_rect(fil = "grey20"), panel.spacing = unit(2, 
        "lines"), plot.title = element_text(size = 28, color = "grey76", 
        hjust = 0.5), plot.subtitle = element_text(size = 20, 
        color = "grey76", hjust = 0.5), plot.caption = element_text(size = 14, 
        color = "grey76", hjust = 0.99), axis.text = element_text(family = "Roboto Mono", 
        size = 14, colour = "grey76"), strip.text.x = element_text(family = "Roboto Mono", 
        size = 14, colour = "grey76"), axis.title = element_text(family = "Roboto Mono", 
        size = 20, colour = "white"), legend.text = element_text(family = "Roboto Mono", 
        size = 10, colour = "grey76"), legend.title = element_text(family = "Roboto Mono", 
        size = 14, colour = "grey76"))
g

1.6 Animation :)

g1 <- g + transition_time(year) + shadow_mark() + enter_recolor()
animate(g1, renderer = gifski_renderer(), height = 800, width = 1000, 
    fps = 10)
anim_save("g1_plot.gif")

Code based on link

plot_data <- combine %>% filter(grepl(pattern = "Chargers|Rams", 
    x = team_name)) %>% filter(year > 2010)

avg_att <- plot_data %>% group_by(team, team_name, year) %>% 
    summarise(avg_weekly_atd = round(x = mean(x = weekly_attendance, 
        na.rm = TRUE), digits = 0)) %>% ungroup()

att_th <- 90000
top_att <- plot_data %>% filter(weekly_attendance > att_th) %>% 
    select(weekly_attendance)

top_att_games <- data.frame(team = character(), team_name = character(), 
    year = double(), week = double(), weekly_attendance = double())
for (i in seq(from = 1, to = nrow(top_att), by = 1)) {
    top_att_games <- rbind(top_att_games, data.frame(combine %>% 
        filter(weekly_attendance == top_att$weekly_attendance[i]) %>% 
        select(team, team_name, year, week, weekly_attendance) %>% 
        arrange(desc(weekly_attendance)) %>% mutate(opp_team = case_when(weekly_attendance == 
        lag(x = weekly_attendance, n = 1) ~ lag(x = team, n = 1), 
        weekly_attendance == lead(x = weekly_attendance, n = 1) ~ 
            lead(x = team, n = 1))) %>% mutate(opp_team_name = case_when(weekly_attendance == 
        lag(x = weekly_attendance, n = 1) ~ lag(x = team_name, 
        n = 1), weekly_attendance == lead(x = weekly_attendance, 
        n = 1) ~ lead(x = team_name, n = 1))) %>% filter(grepl(pattern = "Chargers|Rams", 
        x = team_name))))
}

Generate the Plot

plot <- ggplot(data = plot_data) + geom_point(mapping = aes(x = week, 
    y = weekly_attendance, col = team), shape = 1) + geom_hline(mapping = aes(yintercept = avg_weekly_atd, 
    col = team), linetype = 2, data = avg_att) + geom_text_repel(mapping = aes(x = 10, 
    y = 35000, label = paste("Average Weekly Attendance: ", avg_weekly_atd)), 
    family = "Bahnschrift", size = 3, data = avg_att, seed = 1008, 
    segment.alpha = 0.4) + geom_text_repel(mapping = aes(x = week, 
    y = weekly_attendance, label = paste("vs: ", opp_team_name)), 
    family = "Bahnschrift", size = 2.75, data = top_att_games, 
    seed = 1008, segment.alpha = 0.4) + facet_wrap(facets = ~team_name) + 
    ggthemes::theme_tufte(base_size = 12, base_family = "Bahnschrift") + 
    labs(x = "Game Week", y = "Attendance", col = "Location: ", 
        title = "Weekly NFL Attendances for Chargers and Rams Games, Year: {closest_state}", 
        subtitle = "Moving to LA was Deterimental for Attendances at Chargers Games \n and Beneficial for Attendances at Rams Games (Considering 2011 - 2019)", 
        caption = "Tidy Tuesday 2020, Week 6  |  Data from Pro Football Reference  |  @d73mwf") + 
    theme(legend.position = "top") + transition_states(states = year, 
    transition_length = 1, state_length = 5, wrap = FALSE) + 
    enter_fade() + exit_fade()

animate(plot = plot, fps = 20, duration = 10, end_pause = 3, 
    width = 750, height = 500)

anim_save(animation = last_animation(), filename = "g2_plot.gif")

2 Version Information

sessionInfo()
## R Under development (unstable) (2019-12-19 r77606)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
## 
## Matrix products: default
## BLAS:   /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets 
## [6] methods   base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-2 extrafont_0.17    
##  [3] gifski_0.8.6       magrittr_1.5      
##  [5] gganimate_1.0.5    paletteer_1.0.0   
##  [7] forcats_0.4.0      stringr_1.4.0     
##  [9] dplyr_0.8.4        purrr_0.3.3       
## [11] readr_1.3.1        tidyr_1.0.2       
## [13] tibble_2.1.3       ggplot2_3.2.1     
## [15] tidyverse_1.3.0    BiocStyle_2.15.6  
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-143        fs_1.3.1           
##  [3] lubridate_1.7.4     progress_1.2.2     
##  [5] httr_1.4.1          tools_4.0.0        
##  [7] backports_1.1.5     utf8_1.1.4         
##  [9] R6_2.4.1            DBI_1.1.0          
## [11] lazyeval_0.2.2      colorspace_1.4-1   
## [13] withr_2.1.2         tidyselect_1.0.0   
## [15] prettyunits_1.1.1   curl_4.3           
## [17] compiler_4.0.0      extrafontdb_1.0    
## [19] cli_2.0.1           rvest_0.3.5        
## [21] formatR_1.7         xml2_1.2.2         
## [23] prismatic_0.2.0     labeling_0.3       
## [25] bookdown_0.17       jcolors_0.0.4      
## [27] scales_1.1.0        digest_0.6.25      
## [29] rmarkdown_2.1       dichromat_2.0-0    
## [31] pkgconfig_2.0.3     htmltools_0.4.0    
## [33] oompaBase_3.2.9     scico_1.1.0        
## [35] dbplyr_1.4.2        maps_3.3.0         
## [37] palr_0.2.0          rlang_0.4.4        
## [39] readxl_1.3.1        pals_1.6           
## [41] rstudioapi_0.11     farver_2.0.3       
## [43] generics_0.0.2      jsonlite_1.6.1     
## [45] Rcpp_1.0.3          munsell_0.5.0      
## [47] fansi_0.4.1         lifecycle_0.1.0    
## [49] stringi_1.4.6       yaml_2.2.1         
## [51] grid_4.0.0          crayon_1.3.4       
## [53] lattice_0.20-38     haven_2.2.0        
## [55] mapproj_1.2.7       hms_0.5.3          
## [57] knitr_1.28          pillar_1.4.3       
## [59] codetools_0.2-16    reprex_0.3.0       
## [61] glue_1.3.1          evaluate_0.14      
## [63] BiocManager_1.30.10 modelr_0.1.6       
## [65] vctrs_0.2.3         tweenr_1.0.1       
## [67] Rttf2pt1_1.3.8      cellranger_1.1.0   
## [69] gtable_0.3.0        rematch2_2.1.0     
## [71] assertthat_0.2.1    xfun_0.12          
## [73] broom_0.5.4         cluster_2.1.0

3 References